clear all
set more off
set mem 10000000
set matsize 10000
version 15

****************************************************************************
*** FUZZY RDROBUST, Census outcomes: splits on RGGVY treatment intensity ***
****************************************************************************

** Set file paths
do "$path_code/paths.do"

** Set graph scheme
cd "$path/code/analyze"
set scheme fb, perm

****************************************************************** 
****************************************************************** 

{
use "$panel/panel_dataset_full.dta", clear

	// Bring in RGGVY district-level administrative data (10th Plan only)
preserve
use "$rggvy/rggvy_district_progress_X_XI_processed.dta", clear
egen dt_group = group(st_code dt_code)
egen temp_group = group(plan implement_type)
unique st_code, by(temp_group) gen(uniq_st)
unique dt_group, by(temp_group) gen(uniq_dt)
unique dpr_code, by(temp_group) gen(uniq_dpr)
unique st_code if plan==10
unique st_code if plan==11
unique dt_group if plan==10
unique dt_group if plan==11
count if award_date<sanction_date
replace award_date = sanction_date if award_date<sanction_date
replace award_date = max(award_date,17553) if plan==11
keep if plan==10
collapse (min) min_award_date=award_date (max) max_award_date=award_date ///
	(sum) award_cost total_released achiev_UDE achiev_ELEC achiev_BPL ///
	, by(st_code dt_code)	
gen med_award_date = round((min_award_date+max_award_date)/2,1)
format %td med_award_date	
tempfile admin
save `admin'
restore	
merge m:1 st_code dt_code using `admin' 

	// RGGVY admin splitter 1: (# villages treated) / (# villages in district)
egen temp1 = count(vi_code) if tot_p>=300, by(st_code dt_code)
egen temp2 = mode(temp1), by(st_code dt_code)
gen RGGVY_share_villages_300 = (achiev_UDE + achiev_ELEC) / temp2
egen temp3 = count(vi_code), by(st_code dt_code)
gen RGGVY_share_villages_all = (achiev_UDE + achiev_ELEC) / temp3
egen temp_tag = tag(st_code dt_code) 
twoway scatter RGGVY_share_villages_300 RGGVY_share_villages_all if temp_tag
tab state if temp_tag & RGGVY_share_villages_300>1.4 & RGGVY_share_villages_all!=. & corr_state==1
gen RGGVY_rule_breaker = RGGVY_share_villages_300>1.4
twoway scatter RGGVY_share_villages_300 RGGVY_share_villages_all if temp_tag & RGGVY_rule_breaker==0
sum RGGVY_share_villages_all if temp_tag & RGGVY_rule_breaker==0, detail
sum RGGVY_share_villages_300 if temp_tag & RGGVY_rule_breaker==0, detail
// split on 60% of villages in district
drop temp*

	// RGGVY admin splitter 2: (# BPL HHs treated) / (# village HHs in district)
egen temp1 = sum(no_hh11) if tot_p>=300, by(st_code dt_code)
egen temp2 = mode(temp1), by(st_code dt_code)
gen RGGVY_share_hh_300 = (achiev_BPL) / temp2
egen temp3 = sum(no_hh11), by(st_code dt_code)
gen RGGVY_share_hh_all = (achiev_BPL) / temp3
egen temp_tag = tag(st_code dt_code) 
twoway scatter RGGVY_share_hh_300 RGGVY_share_hh_all if temp_tag 
twoway scatter RGGVY_share_hh_300 RGGVY_share_hh_all if temp_tag & RGGVY_rule_breaker==0
sum RGGVY_share_hh_all if temp_tag & RGGVY_rule_breaker==0, detail
sum RGGVY_share_hh_300 if temp_tag & RGGVY_rule_breaker==0, detail
// split on 10% of HHs in district
drop temp*

	// RGGVY admin splitter 3: (Rs allocated) / (# villages treated)
egen temp_tag = tag(st_code dt_code) 
gen RGGVY_lakh_per_v = (total_released) /  (achiev_UDE + achiev_ELEC)
sum RGGVY_lakh_per_v if temp_tag & RGGVY_rule_breaker==0, detail
// split on 10 lakh per village
drop temp*

	// Keep only districts with high treatment intensity (by village)
keep if RGGVY_rule_breaker==0 & RGGVY_share_villages_all>=0.6

	// Keep villages in RD sample
gen in_rf_sample = vplan4<11 & sample==1 & sample_h==1 & sing_h==1 & pop_non_zero==1 
gen in_fs_sample = vplan4<11 & corr_state==1 & sample==1 & sample_h==1 & sing_h==1 & pop_non_zero==1 
keep if in_rf_sample==1 | in_fs_sample==1
	
	// Create state FEs (since RD robust doesn't let you pass them through)
drop if st_code==32 // Kerala, only 3 villages
tab st_code if in_fs_sample==1, gen(STFEfs)	
drop STFEfs1 // to avoid collinearity

	// Create district FEs (since RD robust doesn't let you pass them through)
gen stdtFE = stdt
tab stdtFE state     if inlist(stdtFE,62,63,64,70,91,175,180,181,202,308,309,320,490,493,494,495,504,505,508)	
replace stdtFE = 999 if inlist(stdtFE,62,63,64,70,91,175,180,181,202,308,309,320,490,493,494,495,504,505,508)
	// one catch-all district FE for districts with so few in-sample villages that they break rdrobust
tab stdtFE if in_fs_sample==1, gen(DTFEfs)	
drop DTFEfs1 // to avoid collinearity

	// Keep only fs_sample
keep if in_fs_sample==1	

	// Create block groups (for clustering)
egen stdtbk = group(stdt bk_code)

	// Create lights-difference variable, to identify crazy outliers
gen lights_diff = abs(lights_max2011_hat - lights_max2001_hat)

	// Create population difference outcome variables
gen tot_p11_diff = tot_p11-tot_p
reghdfe tot_p11_diff tot_p if tot_p<1000, a(st_code) residuals(tot_p11_diff2)

	// Create macros for lists of outcome variables 
global vars_demo = "tot_p11 tot_p11_diff tot_p11_diff2 pct_06_11 hpca11_h_size_avg lit_p_11"
global vars_labor_pct = "work_pooled_ag_p_11 work_pooled_ag_m_11 work_pooled_ag_f_11 work_pooled_hh_p_11 work_pooled_hh_m_11 work_pooled_hh_f_11 work_pooled_ot_p_11 work_pooled_ot_m_11 work_pooled_ot_f_11"
global vars_assets = "hpca11_assets_tel_l_m_b hpca11_assets_tv hpca11_assets_bic hpca11_assets_smm hpca11_assets_none"
global vars_hhold = "hpca11_cook_good hpca11_msl_bad hpca11_mat_f_m hpca11_mat_r_gtbw hpca11_t_hh_d"
global vars_vd = "vdp_com_d_phone_mob_11 vd_com_d_post_off_11 vd_fin_d_ac_soc_11 vd_wat_d_tubewell_11 pct_irr_11 pct_sown_11"

	// Create index of HPCA variables
gen hpca11_mat_r_not_gtbw = 1-hpca11_mat_r_gtbw	
gen hpca01_mat_r_not_gtbw = 1-hpca01_mat_r_gtbw	
gen hpca11_mat_f_not_m = 1-hpca11_mat_f_m
gen hpca01_mat_f_not_m = 1-hpca01_mat_f_m	
local hpca_index_vars = "hpca11_t_hh_g hpca11_t_hh_r_g hpca11_t_hh_rc_g hpca11_owned hpca11_w_within hpca11_lat_premise hpca11_cook_good " ///
					  + "hpca11_cook_ih_kitchen hpca11_bank hpca11_assets_tel_l_m_b hpca11_assets_tv hpca11_assets_bic hpca11_assets_smm " ///
					  + "hpca11_hh_perm hpca11_mat_r_not_gtbw hpca11_mat_f_not_m " 
foreach v of varlist `hpca_index_vars' {
    local v01 = subinstr("`v'","hpca11","hpca01",1)
	qui sum `v' if tot_p<=1000
	gen Z`v' = (`v' - r(mean))/(r(sd))
	qui sum `v01' if tot_p<=1000
	gen Z`v01' = (`v01' - r(mean))/(r(sd))
}
egen double index_hpca11 = rmean(Zhpca11*)
egen double index_hpca01 = rmean(Zhpca01*)
drop Z*

	// Create index of VD variables
local vd_index_vars = "vd_com_d_phone_ll_11 vd_com_d_post_off_11 vd_edu_n_p_sch_11 vd_edu_n_m_sch_11 vd_edu_n_s_sch_11 " ///
					+ "vd_edu_n_s_s_sch_11 vd_edu_n_coll_11 vd_edu_n_tr_sch_11 vd_fin_d_ac_soc_11 vd_fin_d_bank_11 vd_fin_d_comm_bank_11 " /// 
					+ "vd_fin_d_coop_bank_11 vd_geo_a_irr_11 vd_hea_n_alt_hosp_11 vd_hea_n_fw_cntr_11 vd_hea_n_mcw_cntr_11 vd_hea_n_ph_subcntr_11 " ///
					+ "vd_tra_d_bus_11 vd_wat_d_any_11"
local vd_index_vars11 = "vdp_com_d_phone_mob_11 vdp_tra_d_auto_11 vdp_tra_d_aw_road_11 vdp_tra_d_rick_11 vdp_tra_d_taxi_11 vdp_tra_d_tractor_11 vdp_tra_d_van_11 pct_sown_11"
foreach v of varlist `vd_index_vars' `vd_index_vars11' {
    local v01 = subinstr("`v'","_11","_01",1)
	qui sum `v' if tot_p<=1000
	gen Z`v' = (`v' - r(mean))/(r(sd))
	cap {
		qui sum `v01' if tot_p<=1000
		gen Z`v01' = (`v01' - r(mean))/(r(sd))
	}	
}
egen double index_vd11 = rmean(Zvd*11)
egen double index_vd01 = rmean(Zvd*01)
drop Z*

global vars_index = "work_p_11 index_hpca11 index_vd11"


	// Create variables to store regresson results
gen fuzzy_step = .
gen fuzzyvar = ""
gen yvar = ""
gen ifs = ""
gen controls_base = ""
gen control = ""
gen fe = ""
gen kernel = ""
gen bwmethod = ""
gen vce = ""
gen polynomial_order = .
gen beta_conv = .
gen beta_robust = .
gen se_conv = .
gen se_robust = .
gen pval_conv = .
gen pval_robust = .
gen lci_conv = .
gen uci_conv = .
gen lci_robust = .
gen uci_robust = .
gen bw_lo = .
gen bw_hi = .
gen nobs_orig = .
gen nobs_left = .
gen nobs_right = .
gen nobs_total = .
gen ndist = . 
gen ymean = .
gen ftag = ""
gen dprtag = ""


** Massive loop over RDROBUST outcomes and sensitivities

// Loop over endogenous variables 
foreach fuzzy of varlist vdp_pwr_h_com_avg_11 lights_max2011_hat hpca11_msl_elec {
	
	// Loop over outcome variables
	foreach y in $vars_demo $vars_labor_pct $vars_assets $vars_hhold $vars_vd $vars_index { ///$vars_labor_pct01 $vars_labor_cnt $vars_labor_bonus $vars_labor_bonus01 {

		// Prep to store results
		foreach v of varlist fuzzy_step-ftag {
			cap replace `v' = ""
			cap replace `v' = .
		}
		local row = 0

		// Define outcome-specific stuff
		global yvar = "`y'"
		global control = subinstr("$yvar","11","01",1)
		if substr("$yvar",-4,4)=="1101" {
			global control = subinstr("$yvar","1101","01",1)
		}
		qui do "$path/code/analyze/RDROBUST_rf_outcomes_graphspecs.do"

		// Loop through sensitivities
		foreach fuzzy_step in 1 4 5 11 {

			// Reset RDROBUST defaults
			local ifs = "in_fs_sample==1 & pop_mismatch20==0 & lights_diff<20"
			local controls_base = "lights_max1998 lights_max1999 lights_max2000 lights_max2001 lights_max2002 lights_max2003 lights_max2004 lights_max2005" 
			local fe = "STFEfs*"
			local kernel = "tri"
			local bwmethod = "mserd"
			local vce = ""
			local poly = 1

			// Define step-specific RDROBUST settings
			if `fuzzy_step'==1 {
				local ftag = "preferred"
			}
			if `fuzzy_step'==2 {
				local ftag = "popmismatch"
				local ifs = "in_fs_sample==1 & lights_diff<20"
			}
			if `fuzzy_step'==3 {
				local ftag = "lights outliers"
				local ifs = "in_fs_sample==1 & pop_mismatch20==0"
			}
			if `fuzzy_step'==4 {
				local ftag = "epa kernel"
				local kernel = "epa"
			}
			if `fuzzy_step'==5 {
				local ftag = "uni kernel"
				local kernel = "uni"
			}
			if `fuzzy_step'==6 {
				local ftag = "no lights controls"
				local controls_base = ""
			}
			if `fuzzy_step'==7 {
				local ftag = "no FEs"
				local fe = ""
			}
			if `fuzzy_step'==8 {
				local ftag = "district FEs"
				local fe = "DTFEfs*"
			}
			if `fuzzy_step'==9 {
				local ftag = "nncluster by district"
				local vce = "vce(nncluster stdt)"
			}
			if `fuzzy_step'==10 {
				local ftag = "cluster by district"
				local vce = "vce(cluster stdt)"
			}
			if `fuzzy_step'==11 {
				local ftag = "CERRD bandwidth"
				local bwmethod = "cerrd"
			}

			local dprtag = "hi_vill"

			// Use capture since some might break RDROBUST
			cap {
			
				// Run fuzzy RD regresssion
				rdrobust $yvar tot_p if `ifs', c(299.5) fuzzy(`fuzzy') covs(`controls_base' $control `fe') kernel(`kernel') bwselect(`bwmethod') p(`poly') all `vce'

				// Generate in-sample indicator and store bandwidths
				cap drop temp_in_reg
				qui gen temp_in_reg = `ifs' & inrange(tot_p,299.5-e(h_l),299.5+e(h_r)) & ${yvar}!=.

				// Store results
				local row = `row' + 1
				qui replace fuzzy_step = `fuzzy_step' in `row'
				qui replace fuzzyvar = "`fuzzy'" in `row'
				qui replace yvar = "$yvar" in `row'
				qui replace ifs = "`ifs'" in `row'
				qui replace controls_base = "`controls_base'" in `row'
				qui replace control = "$control" in `row'
				qui replace fe = "`fe'" in `row'
				qui replace kernel = e(kernel) in `row'
				qui replace bwmethod = e(bwselect) in `row'
				qui replace vce = "`vce'" in `row'
				qui replace polynomial_order = e(p) in `row'
				qui replace beta_conv = e(tau_cl) in `row'
				qui replace beta_robust = e(tau_bc) in `row'
				qui replace se_conv = e(se_tau_cl) in `row'
				qui replace se_robust = e(se_tau_rb) in `row'
				qui replace pval_conv = e(pv_cl) in `row'
				qui replace pval_robust = e(pv_rb) in `row'
				qui replace lci_conv = e(ci_l_cl) in `row'
				qui replace uci_conv = e(ci_r_cl) in `row'
				qui replace lci_robust = e(ci_l_rb) in `row'
				qui replace uci_robust = e(ci_r_rb) in `row'
				qui replace bw_lo = e(h_l) in `row'
				qui replace bw_hi = e(h_r) in `row'
				qui replace nobs_orig = e(N) in `row'
				qui replace nobs_left = e(N_h_l) in `row'
				qui replace nobs_right = e(N_h_r) in `row'
				qui replace nobs_total = e(N_h_l) + e(N_h_r) in `row'
				qui unique stdt if temp_in_reg==1
				qui replace ndist = r(unique) in `row'
				qui sum $yvar if temp_in_reg==1 & tot_p<299.5
				qui replace ymean = r(mean) in `row'
				qui replace ftag = "`ftag'" in `row'
				qui replace dprtag = "`dprtag'" in `row'

			}	

		// Intermediate output
		di "`fuzzy'    $yvar    `fuzzy_step'   "    c(current_time)

		}

		
		// Save results
		preserve
		keep fuzzy_step-ftag
		dropmiss, obs force
		if !("$yvar"=="tot_p11" & "`fuzzy'"=="vdp_pwr_h_com_avg_11") {
			tempfile reg_results
			save `reg_results'
			clear
			append using "$results/RDROBUST_outcomes_fuzzy_census_dpr.dta" `reg_results'
		}
		duplicates drop
		compress
		save "$results/RDROBUST_outcomes_fuzzy_census_dpr.dta", replace
		restore
		
	}

}

}

****************************************************************** 
****************************************************************** 

